##############################
##     run bootstrap on     ##
##  cleaned cces and output ##
## matrices of distribution ##
##       of opinion by      ##
##        party/class       ##
##############################

rm(list = ls())

# setwd('~/../../Class+Party/')

.libPaths('../rpackages')

library(tidyverse)

source('bootstrap_funs.R')

options(dplyr.summarise.inform = FALSE)

richstates <- c('MD', 'MA', 'NJ', 'HI', 'CA',
                'CT', 'WA', 'NH', 'CO', 'VA',
                'UT', 'AK', 'MN', 'NY', 'RI',
                'DE', 'IL', 'OR', 'WY', 'ND',
                'WI', 'TX', 'PA', 'NV', 'NE')

poorstates <- c('VT', 'KS', 'AZ', 'GA', 'IA',
                'ID', 'MI', 'SD', 'FL', 'ME',
                'OH', 'IN', 'MO', 'NC', 'MT',
                'SC', 'TN', 'OK', 'KY', 'NM',
                'AL', 'LA', 'AR', 'WV', 'MS')

cces_allyears <- readRDS('cces_allyears.rds') %>% 
  filter(year != 2010)

cces_allyears <- cces_allyears %>% 
  subset_unique_questions()
cces_allyears <- cces_allyears %>%   # drops leaners and weird cross-party identifiers from being d or r
  mutate(pid3 = case_when(pid3 == 'Democrat' & pid7 %in% c('Strong Democrat', 'Not very strong Democrat') ~ 'Democrat',
                          pid3 == 'Republican' & pid7 %in% c('Strong Republican', 'Not very strong Republican') ~ 'Republican',
                          pid3 != 'Independent' & pid7 %in% c('Lean Democrat', 'Lean Republican') ~ 'Independent',
                          TRUE ~ pid3),
         stateinc = case_when(state %in% richstates ~ 'richstate',
                                 state %in% poorstates ~ 'poorstate'))

issuetopics <- cces_allyears %>% 
  select(question, year, pap_topic, topic_3, topic_6) %>% 
  distinct() %>% 
  arrange(question)

n_boot <- 1000

output_tables <- expand.grid(party = c('dem', 'rep', 'all'),
                             income = c('inctop', 'incmiddle', 'incbottom'),
                             stateinc = c('richstate', 'poorstate'))

output_tables <- paste(output_tables$party, output_tables$income, output_tables$stateinc, 'boot', sep = '_')

for (i in output_tables) {
  assign(i, matrix(nrow = nrow(issuetopics),
                   ncol = n_boot,
                   dimnames = list(sort(issuetopics$question),
                                   1:n_boot)))
}


set.seed(12345)
ptm <- proc.time()

for (s in 1:n_boot) {
  if (s %% floor(n_boot/10) == 0) {cat('|', s, (proc.time()-ptm)['elapsed']/60, '\n')} else if (s %% floor(n_boot/100) == 0) {cat('|')}
  
  boot_cces <- boot_sample_cces(cces_allyears, quantile = 0.1)
  
  boot_cces$pid_new <- NA
  boot_cces$pid_new[boot_cces$pid3 %in% c('Republican', 'Democrat')] <- boot_cces$pid3[boot_cces$pid3 %in% c('Republican', 'Democrat')]
  boot_cces$pid_new[boot_cces$pid3 %in% c('Independent', 'Other')] <- 'Independent/Other'
  boot_cces <- boot_cces[,names(boot_cces) != 'pid3']
  
  boot_cces <- boot_cces %>% 
    drop_na(opinion)
  
  # class by urban analysis
  
  boot_cces$faminc_group <- NA
  boot_cces$faminc_group[boot_cces$faminc_bottom == 1] <- 'bottom'
  boot_cces$faminc_group[boot_cces$faminc_top == 1] <- 'top'
  boot_cces$faminc_group[boot_cces$faminc_middle == 1] <- 'middle'
  
  
  group_opinions <- boot_cces %>%
    filter(pid_new != 'Independent/Other') %>% 
    drop_na(pid_new, faminc_group, stateinc) %>% 
    group_by(pid_new, faminc_group, stateinc, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>%
                drop_na(faminc_group, stateinc) %>% 
                group_by(faminc_group, stateinc, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_income = paste(pid_new, faminc_group, stateinc, sep = '_')) %>% 
    select(-pid_new, -faminc_group, -stateinc) %>% 
    spread(key = party_income, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_inctop_richstate_boot[,s] <- group_opinions$All_top_richstate
  all_inctop_poorstate_boot[,s] <- group_opinions$All_top_poorstate
  all_incbottom_richstate_boot[,s] <- group_opinions$All_bottom_richstate
  all_incbottom_poorstate_boot[,s] <- group_opinions$All_bottom_poorstate
  
  rep_inctop_richstate_boot[,s] <- group_opinions$Republican_top_richstate
  rep_inctop_poorstate_boot[,s] <- group_opinions$Republican_top_poorstate
  rep_incbottom_richstate_boot[,s] <- group_opinions$Republican_bottom_richstate
  rep_incbottom_poorstate_boot[,s] <- group_opinions$Republican_bottom_poorstate
  
  dem_inctop_richstate_boot[,s] <- group_opinions$Democrat_top_richstate
  dem_inctop_poorstate_boot[,s] <- group_opinions$Democrat_top_poorstate
  dem_incbottom_richstate_boot[,s] <- group_opinions$Democrat_bottom_richstate
  dem_incbottom_poorstate_boot[,s] <- group_opinions$Democrat_bottom_poorstate
  
  
}

print((proc.time() - ptm)/60)

output <- sapply(c('issuetopics', objects(pattern = '^(all|rep|dem)\\_')), function(x) get(x), USE.NAMES = TRUE)

saveRDS(output,
        'opinion_bootstrapped_noleaners_richstatepoorstate.RDS')